Step 4: Scatter plots of height and weight vs. age
It appears that there is a positive relationship between height vs. age, and weight vs. age. As you get older, you get taller and heavier. It is worth noting that the age axis only goes from 10 years old to 30 years old.
# 95% CI for heightheight_ci <- z_sample_summary$height_mean +qnorm(p=c(0.025, 0.975))*z_sample_summary$height_secat("95% CI for height:", height_ci, "\n")
95% CI for height: 66.80724 69.17562
# 95% CI for weightweight_ci <- z_sample_summary$weight_mean +qnorm(p=c(0.025, 0.975))*z_sample_summary$weight_secat("95% CI for weight:", weight_ci, "\n")
95% CI for weight: 138.6779 149.0981
# 95% CI for zombies killedkills_ci <- z_sample_summary$zombies_killed_mean +qnorm(p=c(0.025, 0.975))*z_sample_summary$zombies_killed_secat("95% CI for zombies killed:", kills_ci, "\n")
95% CI for zombies killed: 2.539941 3.500059
# 95% CI for years of educationeducation_ci <- z_sample_summary$years_of_education_mean +qnorm(p=c(0.025, 0.975))*z_sample_summary$years_of_education_secat("95% CI for years of education:", education_ci, "\n")
95% CI for years of education: 2.728455 3.551545
# # 95% CI for ageage_ci <- z_sample_summary$age_mean +qnorm(p=c(0.025, 0.975))*z_sample_summary$age_secat("95% CI for age:", age_ci)
95% CI for age: 19.66584 21.11993
Step 7: Create a sampling distribution and calculate summary statistics for it.
The means and standard deviations of the sampling distribution are similar to that of the single sample.
reps <-199zquant <- z[, sapply(z, is.numeric)] |>select(-id)sample_means <-matrix(NA, nrow = reps, ncol =ncol(zquant))for (i in1:reps) { sample_data <- zquant[sample(1:nrow(zquant), 50), ] sample_means[i, ] <-colMeans(sample_data)}sample_means <-as.data.frame(sample_means)colnames(sample_means) <-c("height_mean", "weight_mean", "zombies_killed_mean", "years_of_education_mean", "age_mean")# Create a df containing the means from the initial, single samplesingle_sample <-data.frame(height_mean = z_sample_summary$height_mean,weight_mean = z_sample_summary$weight_mean,zombies_killed_mean = z_sample_summary$zombies_killed_mean,years_of_education_mean = z_sample_summary$years_of_education_mean,age_mean = z_sample_summary$age_mean)# Add that initial sample to the other 199sample_means <-rbind(sample_means, single_sample)# Get the mean and standard deviation for each variableprint(sampling_distribution_summary_stats <-data.frame(mean =colMeans(sample_means),sd =apply(sample_means, 2, sd)))
Based on the histograms, all of the variables appear to have roughly normally distributed sampling distributions. This includes the ‘zombies_killed’ and ‘years_of_education’ variables which previosuly did not have normal distributions.
# Years of education meanhist(sample_means$years_of_education_mean)
# Age meanhist(sample_means$age_mean)
Step 8b: Q-Q plots for the sampling distribution
As with the histograms, all of the variables appear to have roughly normally distributed sampling distributions. This includes the ‘zombies_killed’ and ‘years_of_education’ variables which previously did not have normal distributions.
# Heightqqnorm(sample_means$height_mean, main="Height mean")qqline(sample_means$height_mean, col ="#bf5700")
# Weightqqnorm(sample_means$weight_mean, main="Weight mean")qqline(sample_means$weight_mean, col ="#bf5700")
# Zombies killedqqnorm(sample_means$zombies_killed_mean, main="Zombies Killed mean")qqline(sample_means$zombies_killed_mean, col ="#bf5700")
# Years of educationqqnorm(sample_means$years_of_education_mean, main="Years of Education mean")qqline(sample_means$years_of_education_mean, col ="#bf5700")
# Ageqqnorm(sample_means$age_mean, main="Age mean")qqline(sample_means$age_mean, col ="#bf5700")
Step 9: Confidence Intervals of Sampling Distribution
The confidence intervals generated here are “narrower” than the confidence intervals generated previously (ex. in Step 7) , with the lower (2.5%) bound closer to the upper(97.5%) bound than in Step 9.
The confidence intervals generated by bootstrapping are “narrower” than the confidence intervals generated in Step 9, with the lower (2.5%) bound closer to the upper(97.5%) bound than in Step 9.
n_boot=1000n <-length(sample_means$height_mean)boot <-vector()for (i in1:n_boot){ boot[[i]] <-mean(sample(sample_means$height_mean, n, replace =TRUE))}quantile(probs =c(0.025, 0.975),boot)
2.5% 97.5%
67.55456 67.71542
boot <-vector()for (i in1:n_boot){ boot[[i]] <-mean(sample(sample_means$weight_mean, n, replace =TRUE))}quantile(probs =c(0.025, 0.975),boot)
2.5% 97.5%
143.5943 144.3163
boot <-vector()for (i in1:n_boot){ boot[[i]] <-mean(sample(sample_means$zombies_killed_mean, n, replace =TRUE))}quantile(probs =c(0.025, 0.975),boot)
2.5% 97.5%
2.962098 3.033403
boot <-vector()for (i in1:n_boot){ boot[[i]] <-mean(sample(sample_means$years_of_education_mean, n, replace =TRUE))}quantile(probs =c(0.025, 0.975),boot)
2.5% 97.5%
2.977495 3.042323
n_boot <-1000boot <-vector()for (i in1:n_boot){ boot[[i]] <-mean(sample(sample_means$age_mean, n, replace =TRUE))}quantile(probs =c(0.025, 0.975),boot)